# Transparent Corruption: The effect of illicit connections and trusted references 
# on the demand for bureaucratic intermediation

# Authors: Jose Ramon Morales-Arilla & Ana Gabriela Ibarra Luces

# SCRIPT 02 - REGRESSION ANALYSIS


# Filtering finished surveys and consenting participants ------------------

# Main regressions dataset
reg_table <- tabla_survey %>% 
   dplyr::filter(Finished == 1 & consent == 1)

# Qualtrics' quality regressions dataset
qualt_table <- tabla_survey %>% 
   filter(Q_RelevantIDDuplicateScore < 75) %>%
   filter(Finished == 1 & consent == 1)

# Qualtrics' quality heterogeneity dataset
qualt_heter_table <- tabla_survey %>% 
   
   transform(gender = as.numeric(gender), 
             univ_career = as.numeric(univ_career), 
             current_semest = as.numeric(current_semest), 
             acad_rank = as.numeric(acad_rank), 
             m_house_income = as.numeric(m_house_income), 
             survey_chann = as.numeric(survey_chann)) %>% 
   
   # Creating binary variables and categorical variables
   dplyr::mutate(
      
      # Binary of male (1) and female or others (0)
      gender = case_when(gender == 2 ~ 1, 
                         gender == 1 ~ 0,
                         gender == 3 ~ 0, 
                         T ~ gender),
      
      # Binary of good previous experience with gestores (1) and bad or neutral experience (0)
      gest_exp = case_when(gest_exp == 1 ~ 1,
                           gest_exp == 2 ~ 0,
                           gest_exp ==3 ~ 0, 
                           T ~ gest_exp),
      
      # Binary of those in early stage of their career (0) and late stage (1) 
      current_semest = case_when(current_semest < 6 ~ 0, 
                                 current_semest > 5 ~ 1, 
                                 T ~ current_semest),
      
      # Binary of those with high GPA (1) and low GPA (0)
      acad_rank = case_when(acad_rank < 4 ~ 1,
                            acad_rank > 3 ~ 0,
                            T ~ acad_rank),
      
      # Binary of high monthly household income (1) and low household income (0)
      m_house_income = case_when(m_house_income < 14 ~ 0, 
                                 m_house_income > 13 ~ 1, 
                                 T ~ m_house_income),
      
      # Categorical variable for faculty
      univ_career = case_when(univ_career == 8 | univ_career == 9 | univ_career == 10 | univ_career == 11 ~ 1, # Engineering 
                              univ_career ==1 | univ_career== 2 | univ_career == 5 | univ_career == 14 | univ_career== 15 ~ 2, # Social science faculty 
                              univ_career == 3 ~ 3, # Communication Management
                              univ_career == 6 | univ_career == 7 | univ_career == 12 | univ_career== 13 ~ 4, # Humanities
                              univ_career == 4 | univ_career == 16 | univ_career == 17 ~ 5, # Law, Architecture, Other
                              T ~ univ_career),
      
      # Start year of university. 
      univ_start_yr = case_when(univ_start_yr < 2019 ~ 1, # Before 2019
                                univ_start_yr > 2018 ~ 0), # After 2018
      
      ingen = ifelse(univ_career == 1, 1, 0), # Engineering 
      
      faces = ifelse(univ_career == 2, 1, 0), # Social science faculty
      
      comsoc = ifelse(univ_career == 3, 1, 0),  # Communication Management
      
      human = ifelse(univ_career == 4, 1, 0), # Humanities
      
      derech_otr = ifelse(univ_career == 5, 1, 0) # Law, Architecture, Other
      
   )%>%
   
   # Filter by Qualtric's quality variable
   dplyr::filter(Q_RelevantIDDuplicateScore < 75) %>%
   
   # Filter finished observations and observations with affirmative consent
   dplyr::filter(Finished == 1 & consent == 1)

# Table 1 - Effect of corruption suggestions on take-up -------------------

# Regression 1
reg_1_c <- feols(Y ~ Sug_Corr, reg_table, vcov = "hetero")

# Regression 2
reg_2_c <- feols(Y ~ Sug_Corr + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , reg_table, vcov = "hetero")
# Regression 3
reg_3_c <- feols(Y ~ Sug_Corr + Precio + Exper + Rapidez + GestLink, reg_table, vcov = "hetero")

# Regression 4
reg_4_c <- feols(Y ~ Sug_Corr + Precio + Exper + Rapidez + GestLink + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , reg_table, vcov = "hetero")

# Table 1 
tab_1 <- huxreg(reg_1_c, reg_2_c, reg_3_c, reg_4_c,
                        statistics = c("N. obs." = "nobs", 
                                       "R squared" = "r.squared", 
                                       "F statistic" = "statistic",
                                       "P value" = "p.value"), 
                        coefs = c("(Intercept)", 
                                  "Corruption" = "Sug_Corr", 
                                  "Price" = "Precio", 
                                  "Experience" = "Exper", 
                                  "Speed" = "Rapidez", 
                                  "Reference" =  "GestLink"), 
                        stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 13)  


print(tab_1)

# Table 2 - Effect of corruption suggestions on price elasticity --------------

# Regression 5
reg_5_c <- feols(Y ~ Sug_Corr*Precio, reg_table, vcov = "hetero")

# Regression 6 
reg_6_c <- feols(Y ~ Sug_Corr*Precio + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , reg_table, vcov = "hetero")

# Regression 7
reg_7_c <-  feols(Y ~ Sug_Corr*Precio + Precio*Exper + Precio*Rapidez + Precio*GestLink,
                  reg_table, vcov = "hetero")

# Regression 8 
reg_8_c <- feols(Y ~ Sug_Corr*Precio + Precio*Exper + Precio*Rapidez + Precio*GestLink + (gender) +
                    age + (univ_career) + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , reg_table, vcov = "hetero")

# Table 2
tab_2 <- huxreg("(1)" = reg_5_c, "(2)" = reg_6_c, "(3)" = reg_7_c, "(4)" = reg_8_c,
                        statistics = c("N. obs." = "nobs", 
                                       "R squared" = "r.squared", 
                                       "F statistic" = "statistic",
                                       "P value" = "p.value",
                                       'RMSE' = "rmse"), 
                        coefs = c("(Intercept)", 
                                  "Corruption" = "Sug_Corr", 
                                  "Price" = "Precio", 
                                  "Corruption x Price" = "Sug_Corr:Precio"),
                        stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 9)%>% 
   
   huxtable::insert_row(c("Other treatments", "No", "No", "Yes", "Yes"), after = 10) 

print(tab_2)


# Table 3 - Effect of corruption suggestions and trusted reference --------

# Regression 9
reg_9_c <- feols(Y ~ Sug_Corr*GestLink, reg_table, vcov = "hetero")

# Regression 10
reg_10_c <- feols(Y ~ Sug_Corr*GestLink + (gender) + age + (univ_career)  
                  + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                     (m_house_income) , reg_table, vcov = "hetero")

# Regression 11
reg_11_c <-  feols(Y ~ Sug_Corr*GestLink + GestLink*Precio  + GestLink*Rapidez + GestLink*Exper , reg_table, vcov = "hetero")

# Regression 12
reg_12_c <- feols(Y ~ Sug_Corr*GestLink + GestLink*Precio  + GestLink*Rapidez + GestLink*Exper +gender + age + (univ_career)  
                  + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                     (m_house_income) , reg_table, vcov = "hetero")

tab_3 <- huxreg("(1)" =reg_9_c, "(2)" =reg_10_c, "(3)" =reg_11_c, "(4)" =reg_12_c,
                        statistics = c("N. obs." = "nobs", 
                                       "R squared" = "r.squared", "F statistic" = "statistic",
                                       "P value" = "p.value"), 
                        coefs = c("(Intercept)", 
                                  'Corruption' = "Sug_Corr",
                                  "Reference"="GestLink", 
                                  "Corruption x Reference" = "Sug_Corr:GestLink"), 
                        stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 9) %>% 
   huxtable::insert_row(c("Other treatments", "No", "No", "Yes", "Yes"), after = 10)

print(tab_3)

# Table 4 -  Demand price elasticity and intermediaries referred by trusted individuals --------

reg_1_s <- feols(Y ~ GestLink*Precio, reg_table, vcov = "hetero")

reg_2_s <- feols(Y ~ GestLink*Precio + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , reg_table, vcov = "hetero")

reg_3_s <-  feols(Y ~ GestLink*Precio + GestLink*Exper + GestLink*Rapidez + GestLink*Sug_Corr , reg_table, vcov = "hetero")

reg_4_s <- feols(Y ~ GestLink*Precio + GestLink*Exper + GestLink*Rapidez + GestLink*Sug_Corr + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , reg_table, vcov = "hetero")

tab_4 <- huxreg(reg_1_s, reg_2_s, reg_3_s, reg_4_s,
                     statistics = c("N. obs." = "nobs", 
                                    "R squared" = "r.squared", 
                                    "F statistic" = "statistic",
                                    "P value" = "p.value"), 
                     coefs = c("(Intercept)", 
                               "Price" = "Precio", 
                               "Reference" ="GestLink",
                               "Price x Reference" = "GestLink:Precio"),
                     stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 9)%>% 
   huxtable::insert_row(c("Other treatments", "No", "No", "Yes", "Yes"), after = 10)

print(tab_4)

# Table A.5 - Effect of corruption suggestions on take-up - Qualtrics’ Quality Filter Sample --------

# Regression 1
reg_1_q <- feols(Y ~ Sug_Corr, qualt_table, vcov = "hetero")

# Regression 2
reg_2_q <- feols(Y ~ Sug_Corr + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , qualt_table, vcov = "hetero")
# Regression 3
reg_3_q <- feols(Y ~ Sug_Corr + Precio + Exper + Rapidez + GestLink, qualt_table, vcov = "hetero")

# Regression 4
reg_4_q <- feols(Y ~ Sug_Corr + Precio + Exper + Rapidez + GestLink + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , qualt_table, vcov = "hetero")

# Table 1 
tab_1_q <- huxreg(reg_1_q, reg_2_q, reg_3_q, reg_4_q,
                statistics = c("N. obs." = "nobs", 
                               "R squared" = "r.squared", 
                               "F statistic" = "statistic",
                               "P value" = "p.value"), 
                coefs = c("(Intercept)", 
                          "Corruption" = "Sug_Corr", 
                          "Price" = "Precio", 
                          "Experience" = "Exper", 
                          "Speed" = "Rapidez", 
                          "Reference" =  "GestLink"), 
                stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 13)  


print(tab_1_q)


# Table A.6 - Effect of corruption suggestions on price elasticity - Qualtrics’ Quality Filter Sample --------------

# Regression 5
reg_5_q <- feols(Y ~ Sug_Corr*Precio, qualt_table, vcov = "hetero")

# Regression 6 
reg_6_q <- feols(Y ~ Sug_Corr*Precio + (gender) + age + (univ_career)  
                 + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , qualt_table, vcov = "hetero")

# Regression 7
reg_7_q <-  feols(Y ~ Sug_Corr*Precio + Precio*Exper + Precio*Rapidez + Precio*GestLink,
                  qualt_table, vcov = "hetero")

# Regression 8 
reg_8_q <- feols(Y ~ Sug_Corr*Precio + Precio*Exper + Precio*Rapidez + Precio*GestLink + (gender) +
                    age + (univ_career) + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                    (m_house_income) , qualt_table, vcov = "hetero")

# Table 2
tab_2_q <- huxreg("(1)" = reg_5_q, "(2)" = reg_6_q, "(3)" = reg_7_q, "(4)" = reg_8_q,
                statistics = c("N. obs." = "nobs", 
                               "R squared" = "r.squared", 
                               "F statistic" = "statistic",
                               "P value" = "p.value",
                               'RMSE' = "rmse"), 
                coefs = c("(Intercept)", 
                          "Corruption" = "Sug_Corr", 
                          "Price" = "Precio", 
                          "Corruption x Price" = "Sug_Corr:Precio"),
                stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 9)%>% 
   
   huxtable::insert_row(c("Other treatments", "No", "No", "Yes", "Yes"), after = 10) 

print(tab_2_q)


# Table A.7 - Effect of corruption suggestions and trusted reference - Qualtrics’ Quality Filter Sample --------

# Regression 9
reg_9_q <- feols(Y ~ Sug_Corr*GestLink, qualt_table, vcov = "hetero")

# Regression 10
reg_10_q <- feols(Y ~ Sug_Corr*GestLink + (gender) + age + (univ_career)  
                  + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                     (m_house_income) , qualt_table, vcov = "hetero")

# Regression 11
reg_11_q <-  feols(Y ~ Sug_Corr*GestLink + Sug_Corr*Precio  + Sug_Corr*Rapidez + Sug_Corr*Exper , qualt_table, vcov = "hetero")

# Regression 12
reg_12_q <- feols(Y ~ Sug_Corr*GestLink + Sug_Corr*Precio  + Sug_Corr*Rapidez + Sug_Corr*Exper +gender + age + (univ_career)  
                  + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                     (m_house_income) , qualt_table, vcov = "hetero")

tab_3_q <- huxreg("(1)" =reg_9_q, "(2)" =reg_10_q, "(3)" =reg_11_q, "(4)" =reg_12_q,
                statistics = c("N. obs." = "nobs", 
                               "R squared" = "r.squared", "F statistic" = "statistic",
                               "P value" = "p.value"), 
                coefs = c("(Intercept)", 
                          'Corruption' = "Sug_Corr",
                          "Reference"="GestLink", 
                          "Corruption x Reference" = "Sug_Corr:GestLink"), 
                stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 11) %>% 
   huxtable::insert_row(c("Other treatments x Corruption", "No", "No", "Yes", "Yes"), after = 11)

print(tab_3_q)

# Table A.8 - Heterogeneity in the effect of corruption suggestions in career stage - Qualtrics’ Quality Filter Sample --------

reg_1_h <- feols(Y ~ Sug_Corr*(current_semest), qualt_heter_table, vcov = "hetero")

reg_2_h <- feols(Y ~ Sug_Corr*(current_semest) + age + (gender)  
                  + as.factor(univ_career) + (univ_start_yr) + (acad_rank) + (income_source) + 
                     (m_house_income) , qualt_heter_table, vcov = "hetero")

reg_3_h <-  feols(Y ~ Sug_Corr*(current_semest) + (current_semest)*Precio  + (current_semest)*Rapidez + (current_semest)*Exper +  (current_semest)*GestLink, 
                  qualt_heter_table, vcov = "hetero")

reg_4_h <- feols(Y ~ Sug_Corr*(current_semest) + (current_semest)*Precio  + (current_semest)*Rapidez + (current_semest)*Exper +  (current_semest)*GestLink+ age + (gender)  
                  + as.factor(univ_career) + (univ_start_yr) + (acad_rank) + (income_source) + 
                     (m_house_income) , qualt_heter_table, vcov = "hetero")


tab_h <- huxreg(reg_1_h, reg_2_h, reg_3_h, reg_4_h,
                  statistics = c("N. obs." = "nobs", 
                                 "R squared" = "r.squared", "F statistic" = "statistic",
                                 "P value" = "p.value"), 
                  coefs = c("(Intercept)", 
                            "Corruption" = "Sug_Corr", 
                            "Late Stage" = "current_semest", 
                            "Corruption x Late Stage" = "Sug_Corr:current_semest"),
                  stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 9) %>% 
   huxtable::insert_row(c("Other treatments x Late Stage", "No", "No", "Yes", "Yes"), after = 10)

print(tab_h)


# Table A.9 -  Demand price elasticity and intermediaries referred by trusted individuals - Qualtrics’ Quality Filter Sample --------

reg_1_sq <- feols(Y ~ GestLink*Precio, qualt_table, vcov = "hetero")

reg_2_sq <- feols(Y ~ GestLink*Precio + (gender) + age + (univ_career)  
                  + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                     (m_house_income) , qualt_table, vcov = "hetero")

reg_3_sq <-  feols(Y ~ GestLink*Precio + GestLink*Exper + GestLink*Rapidez + GestLink*Sug_Corr , qualt_table, vcov = "hetero")

reg_4_sq <- feols(Y ~ GestLink*Precio + GestLink*Exper + GestLink*Rapidez + GestLink*Sug_Corr + (gender) + age + (univ_career)  
                  + (univ_start_yr) + (current_semest) + (acad_rank) + (income_source) + 
                     (m_house_income) , qualt_table, vcov = "hetero")

tab_sq <- huxreg(reg_1_sq, reg_2_sq, reg_3_sq, reg_4_sq,
                statistics = c("N. obs." = "nobs", 
                               "R squared" = "r.squared", 
                               "F statistic" = "statistic",
                               "P value" = "p.value"), 
                coefs = c("(Intercept)", 
                          "Price" = "Precio", 
                          "Reference" ="GestLink",
                          "Price x Reference" = "GestLink:Precio"),
                stars = c(`*` = 0.1, `**` = 0.05, `***` = 0.01)) %>% 
   huxtable::insert_row(c("Covariates","No","Yes","No", "Yes"),after = 9)%>% 
   huxtable::insert_row(c("Other treatments", "No", "No", "Yes", "Yes"), after = 10)

print(tab_sq)


# EXPORT  -----------------------------------------------------------------

list_reg_tables <- list('Table 1' = tab_1, 
                        'Table 2' = tab_2, 
                        'Table 3' = tab_3, 
                        'Table 4' = tab_4, 
                        'Table A.5' = tab_1_q, 
                        'Table A.6' = tab_2_q, 
                        'Table A.7' = tab_3_q, 
                        'Table A.8' = tab_h, 
                        'Table A.9' = tab_sq)

write_xlsx(list_reg_tables, 'Data/Output/Tables/02_regression_tables.xlsx')

### ----------------------------------------------------------------------

rm(list = setdiff(ls(), c('tabla_survey', 'tab_1', 'tab_2', 'tab_3', 'tab_4', 
                  'tab_1_q', 'tab_2_q', 'tab_3_q', 'tab_h', 'tab_sq', 
                  'reg_1_c', 'reg_5_c', 'reg_9_c', 'reg_1_s', 'reg_1_h')))

